This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

library(plotly)
library(ggplot2)
library(dplyr)
library(magrittr)
movie <- read.csv("C:/Users/broadwmc/Desktop/movie.csv") 
summary(movie)
]
table(movie$clean_test)
str(movie)
'data.frame':   1794 obs. of  15 variables:
 $ year          : int  2013 2012 2013 2013 2013 2013 2013 2013 2013 2013 ...
 $ imdb          : Factor w/ 1794 levels "tt00293564","tt0035423",..: 1707 1551 1769 1510 1099 1546 1667 1781 1732 1733 ...
 $ title         : Factor w/ 1768 levels "(500) Days of Summer",..: 15 396 4 11 25 26 41 55 63 66 ...
 $ test          : Factor w/ 10 levels "dubious","dubious-disagree",..: 5 10 6 5 3 3 5 10 9 5 ...
 $ clean_test    : Factor w/ 5 levels "dubious","men",..: 3 5 3 3 2 2 3 5 5 3 ...
 $ binary        : Factor w/ 2 levels "FAIL","PASS": 1 2 1 1 1 1 1 2 2 1 ...
 $ budget        : int  13000000 45000000 20000000 61000000 40000000 225000000 92000000 12000000 13000000 130000000 ...
 $ domgross      : Factor w/ 1751 levels "#N/A","0","100012499",..: 733 257 1296 1574 1729 1050 1485 347 471 1404 ...
 $ intgross      : Factor w/ 1757 levels "#N/A","100179814",..: 1102 1078 338 223 1711 285 846 1655 442 670 ...
 $ code          : Factor w/ 85 levels "1970PASS","1971FAIL",..: 84 83 84 84 84 84 84 85 85 84 ...
 $ budget_2013.  : int  13000000 45658735 20000000 61000000 40000000 225000000 92000000 12000000 13000000 130000000 ...
 $ domgross_2013.: Factor w/ 1776 levels "#N/A","100004670",..: 712 217 1261 1545 1739 1014 1470 321 435 1376 ...
 $ intgross_2013.: Factor w/ 1783 levels "#N/A","1004550114",..: 1078 1062 343 216 1738 279 837 1669 430 664 ...
 $ period.code   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ decade.code   : int  1 1 1 1 1 1 1 1 1 1 ...
# Data transforming  
movie$domgross <- as.integer(movie$domgross)
movie$intgross <- as.integer(movie$intgross)
movie$domgross_2013. <- as.integer(movie$domgross_2013.)
movie$intgross_2013. <- as.integer(movie$intgross_2013.)
# Simple dplyr piping %>% example 
movie %>%
  select(year, title, code) %>%
  group_by(year)
# What percentage of all film passes and fails the Bechdel test? 
## 44% = PASS = 803/1794 = 44.60% and 56% = FAIL = 991/1794 = 55.24% 
table(movie %>%
  select(clean_test, binary) %>%
  group_by(binary) %>%
    count()) 
      n
binary 803 991
  FAIL   0   1
  PASS   1   0
# What percentage of film in past decade passes and fails the Bechdel test?
## PASS = 577/1215 = 47% and FAIL = 638/1215 = 53% 
table(movie %>%
        select(year, clean_test, binary) %>%
        filter(year > "2000") %>%
        group_by(binary) %>%
        count())
      n
binary 577 638
  FAIL   0   1
  PASS   1   0
# Simple histogram showing number of films that fail and pass Bechdel test, color categorized by categorical variable 'test' 
plot_ly(movie, x = ~binary, color = ~clean_test) %>%
  add_histogram() 
# Histograms faceted by 'test' 
one_plot <- function(d) {
  plot_ly(d, x = ~budget) %>%
    add_annotations(
      ~unique(clean_test), x = 0.5, y = 1, 
      xref = "paper", yref = "paper", showarrow = FALSE
    )
}
movie %>%
  split(.$clean_test) %>%
  lapply(one_plot) %>% 
  subplot(nrows = 3, shareX = TRUE) %>%
  hide_legend()
No trace type specified:
  Based on info supplied, a 'histogram' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#histogram
No trace type specified:
  Based on info supplied, a 'histogram' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#histogram
No trace type specified:
  Based on info supplied, a 'histogram' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#histogram
No trace type specified:
  Based on info supplied, a 'histogram' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#histogram
No trace type specified:
  Based on info supplied, a 'histogram' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#histogram
# Scatterplot of budget by domestic gross 
## Introducing text attribute 
plot_ly(movie, x = ~budget_2013., y = ~domgross , type="scatter", mode = "markers", text = ~paste("Title: ", title, '<br>Year:', year), marker=list(size=5 , opacity=0.5), color = ~clean_test)
# 3D scatterplot of budget by domestic gross by international gross 
plot_ly(movie, x = ~budget_2013., y = ~domgross, z = ~intgross, type = "scatter3d", 
        mode = "markers", text = ~paste("Title: ", title, '<br>Year:', year), 
        marker=list(size=5 , opacity=0.5), color = ~clean_test)
# Donut chart of movies (2008-2013) that pass and fail the Bechdel Test
movie %>%
  group_by(code) %>%
  filter(year > "2007") %>%
  summarise(count = n()) %>%
  plot_ly(labels = ~code, values = ~count) %>%
  add_pie(hole = 0.6) %>%
  layout(title = "2008-2013 Film Bechdel Test Pass/Fail",  showlegend = F,
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
# Boxplot of Bechdel pass/fail (2009-2013) by budget 
## Factor by code year 
## +Select: cleantest and binary. Change: plot_ly(x = ~movie$binary, y = ~movie$budget_2013., color = ~movie$clean_test, type = "box") %>% layout(boxmode = "group")
movie$code <- factor(movie$code,levels = c("2008PASS", "2008FAIL", "2009PASS", "2009FAIL", "2010PASS", "2010FAIL", "2011PASS", "2011FAIL", "2012PASS", "2012FAIL", "2013PASS", "2013FAIL"))
movie %>%
  filter(year > "2008") %>%
  plot_ly(y = ~budget_2013., color = ~code, type = "box", colors = "Set3") 

# Line graph of pass/fail budgets 
movie %>% 
  plot_ly(x = ~year, y = ~budget_2013.) %>% 
  add_lines(color = ~binary) %>%
  layout(
    xaxis = list(title = "year"),
    yaxis = list(title = "budget")
  ) 
minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels
# Line graph of clean_test 
movie %>%
  group_by(clean_test) %>%
  transform(id = as.integer(factor(clean_test))) %>%
  plot_ly(x = ~year, y = ~budget_2013., color = ~clean_test, colors = "Dark2", text = ~paste("Title: ", title, '<br>Year:', year)) %>%
  add_lines() %>%
  layout(yaxis=list(title="budget"))
# Hadley Wickham's animated bubble chart using gapminder dataset 
data(gapminder, package = "gapminder")
gg <- ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
  geom_point(aes(size = pop, frame = year, ids = country)) +
  scale_x_log10()
Ignoring unknown aesthetics: frame, ids
ggplotly(gg)
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
# More complex dylr. Use either top_n function or filter(budget_2013. == max(budget_2013.)) 
topfive <- movie %>%
  select(title, year, binary, clean_test, budget_2013.) %>%
  group_by(year) %>%
  top_n(n=5) %>%
  filter(year > "2000") %>%
  as.data.frame()
Selecting by budget_2013.
# Creating a dataframe for the 'bottom five' (least budget) 
btmfive <- movie %>%
  select(title, year, binary, clean_test, budget_2013.) %>%
  group_by(year) %>%
  top_n(n=-5) %>%
  filter(year > "2000") %>%
  as.data.frame()
Selecting by budget_2013.
# As seen in Wicham's gapminder example, plotly can be used to transform existing ggplot2 visualization into plotly visualizations
top <- topfive %>%
  ggplot(mapping = aes(x = binary, fill=clean_test, text = paste("title:", title))) +
  geom_bar() +
  facet_grid(. ~ year) + 
  theme(axis.text.x = element_text(angle = 45)) +
  scale_fill_brewer(palette="OrRd") 
ggplotly(top)
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
# Apply ggplotly to 'bottom five' films   
bottom <- btmfive %>%
  ggplot(mapping = aes(x = binary, fill=clean_test, text = paste("title:", title))) +
  geom_bar() +
  facet_grid(. ~ year) + 
  theme(axis.text.x = element_text(angle = 45)) +
  scale_fill_brewer(palette="BuGn") 
ggplotly(bottom)
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

---
title: "R Plotly Visualization"
output: html_notebook
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 

```{r}
library(plotly)
library(ggplot2)
library(dplyr)
library(magrittr)
```

```{r}
movie <- read.csv("C:/Users/broadwmc/Desktop/movie.csv") 
```

```{r}
summary(movie)
]
```

```{r}
table(movie$clean_test)
```

```{r}
str(movie)
```

```{r}
# Data transforming  
movie$domgross <- as.integer(movie$domgross)
movie$intgross <- as.integer(movie$intgross)
movie$domgross_2013. <- as.integer(movie$domgross_2013.)
movie$intgross_2013. <- as.integer(movie$intgross_2013.)
```

```{r}
# Simple dplyr piping %>% example 
movie %>%
  select(year, title, code) %>%
  group_by(year)
```

```{r}

# What percentage of all film passes and fails the Bechdel test? 
## 44% = PASS = 803/1794 = 44.60% and 56% = FAIL = 991/1794 = 55.24% 
table(movie %>%
  select(clean_test, binary) %>%
  group_by(binary) %>%
    count()) 
```

```{r}
# What percentage of film in past decade passes and fails the Bechdel test?
## PASS = 577/1215 = 47% and FAIL = 638/1215 = 53% 
table(movie %>%
        select(year, clean_test, binary) %>%
        filter(year > "2000") %>%
        group_by(binary) %>%
        count())
```

```{r}
# Simple histogram showing number of films that fail and pass Bechdel test, color categorized by categorical variable 'test' 
plot_ly(movie, x = ~binary, color = ~clean_test) %>%
  add_histogram() 

```

```{r}

# Histograms faceted by 'test' 
one_plot <- function(d) {
  plot_ly(d, x = ~budget) %>%
    add_annotations(
      ~unique(clean_test), x = 0.5, y = 1, 
      xref = "paper", yref = "paper", showarrow = FALSE
    )
}

movie %>%
  split(.$clean_test) %>%
  lapply(one_plot) %>% 
  subplot(nrows = 3, shareX = TRUE) %>%
  hide_legend()
```

```{r}
# Scatterplot of budget by domestic gross 
## Introducing text attribute 
plot_ly(movie, x = ~budget_2013., y = ~domgross , type="scatter", mode = "markers", text = ~paste("Title: ", title, '<br>Year:', year), marker=list(size=5 , opacity=0.5), color = ~clean_test)

```

```{r}
# 3D scatterplot of budget by domestic gross by international gross 
plot_ly(movie, x = ~budget_2013., y = ~domgross, z = ~intgross, type = "scatter3d", 
        mode = "markers", text = ~paste("Title: ", title, '<br>Year:', year), 
        marker=list(size=5 , opacity=0.5), color = ~clean_test)
```

```{r}
# Donut chart of movies (2008-2013) that pass and fail the Bechdel Test
movie %>%
  group_by(code) %>%
  filter(year > "2007") %>%
  summarise(count = n()) %>%
  plot_ly(labels = ~code, values = ~count) %>%
  add_pie(hole = 0.6) %>%
  layout(title = "2008-2013 Film Bechdel Test Pass/Fail",  showlegend = F,
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
```

```{r}
# Boxplot of Bechdel pass/fail (2009-2013) by budget 
## Factor by code year 
## +Select: cleantest and binary. Change: plot_ly(x = ~movie$binary, y = ~movie$budget_2013., color = ~movie$clean_test, type = "box") %>% layout(boxmode = "group")

movie$code <- factor(movie$code,levels = c("2008PASS", "2008FAIL", "2009PASS", "2009FAIL", "2010PASS", "2010FAIL", "2011PASS", "2011FAIL", "2012PASS", "2012FAIL", "2013PASS", "2013FAIL"))

movie %>%
  filter(year > "2008") %>%
  plot_ly(y = ~budget_2013., color = ~code, type = "box", colors = "Set3") 

# Line graph of pass/fail budgets 
movie %>% 
  plot_ly(x = ~year, y = ~budget_2013.) %>% 
  add_lines(color = ~binary) %>%
  layout(
    xaxis = list(title = "year"),
    yaxis = list(title = "budget")
  ) 
```

```{r}
# Line graph of clean_test 
movie %>%
  group_by(clean_test) %>%
  transform(id = as.integer(factor(clean_test))) %>%
  plot_ly(x = ~year, y = ~budget_2013., color = ~clean_test, colors = "Dark2", text = ~paste("Title: ", title, '<br>Year:', year)) %>%
  add_lines() %>%
  layout(yaxis=list(title="budget"))
```

```{r}
# Hadley Wickham's animated bubble chart using gapminder dataset 
data(gapminder, package = "gapminder")
gg <- ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
  geom_point(aes(size = pop, frame = year, ids = country)) +
  scale_x_log10()
ggplotly(gg)
```

```{r}
# More complex dylr. Use either top_n function or filter(budget_2013. == max(budget_2013.)) 
topfive <- movie %>%
  select(title, year, binary, clean_test, budget_2013.) %>%
  group_by(year) %>%
  top_n(n=5) %>%
  filter(year > "2000") %>%
  as.data.frame()
```

```{r}
# Creating a dataframe for the 'bottom five' (least budget) 
btmfive <- movie %>%
  select(title, year, binary, clean_test, budget_2013.) %>%
  group_by(year) %>%
  top_n(n=-5) %>%
  filter(year > "2000") %>%
  as.data.frame()

```

```{r}
# As seen in Wicham's gapminder example, plotly can be used to transform existing ggplot2 visualization into plotly visualizations
top <- topfive %>%
  ggplot(mapping = aes(x = binary, fill=clean_test, text = paste("title:", title))) +
  geom_bar() +
  facet_grid(. ~ year) + 
  theme(axis.text.x = element_text(angle = 45)) +
  scale_fill_brewer(palette="OrRd") 
```

```{r}
ggplotly(top)
```

```{r}
# Apply ggplotly to 'bottom five' films   
bottom <- btmfive %>%
  ggplot(mapping = aes(x = binary, fill=clean_test, text = paste("title:", title))) +
  geom_bar() +
  facet_grid(. ~ year) + 
  theme(axis.text.x = element_text(angle = 45)) +
  scale_fill_brewer(palette="BuGn") 
```

```{r}
ggplotly(bottom)
```

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
